#########################################
#### First build a master matrix which determines the distance between zip codes

#load data
ResZip=read.csv("ResDistance_ReSTAT.csv")

# install distance package
library('fossil')

# calculate distance matrix
distmatrix=matrix(NaN, 1437, 1437)
distmatrix= earth.dist(ResZip[c('longitude','latitude')],dist=F)

#########################################
#### Then map the distances onto each set of peers
ReSTATzip1=ResZip$zip1
ResZip1=read.csv("ResDistance1_ReSTAT.csv")

peerdistmatrix=matrix(NaN,35536,35536)
key=match(ResZip1$zip1,ReSTATzip1)


N=35536
system.time(
  for (i in 1:N){
    for (j in 1:N){
      print(i)
      print(j)
      if (!is.na(key[i]) & !is.na(key[j]))
        peerdistmatrix[i,j]=distmatrix[key[i], key[j] ]
    }
  }
)

## convert to miles
peerdistmatrix=peerdistmatrix*0.621371

save(peerdistmatrix, file="peerdistmatrix_ReSTAT.RData")

# find median distance among overlapping kids
#peermatrix1=matrix(as.numeric(peermatrix>0 & is.na(peermatrix)==F),35536,35536)
#peermatrix1[peermatrix1==0]=NaN
#peerdistmatrix=peermatrix1*peerdistmatrix
#sum(is.na(peerdistmatrix))
#median(peerdistmatrix)
# median number of miles between overlapping peers is 102




